clear all
cd "C:\Users\awcassidy1\Dropbox\jmp_new\Survey stuff\qualtrics_final_survey_data"

import delimited "Observability of Energy Efficiency in Real Estate Transactions_August 30, 2021_10.13.csv", clear

*need to change some commas in quotes in line 3 of the data which messes the rest of the data up.
filefilter "Observability of Energy Efficiency in Real Estate Transactions_August 30, 2021_10.13.csv" "temp.csv", from(Date\Q\Q,\Q\Qtime) to(QQ) replace



if c(version)<16 {
	import delimited "temp.csv", bindquote(strict) encoding(utf8) clear delimiters(comma)
	}

if c(version)>=16 {
	import delimited "temp.csv", bindquote(strict) encoding(utf8) clear delimiters(comma) maxquotedrows(unlimited)	
	}


*delete the temporary csv we created.
cap rm "temp.csv"

*** Dropping one row of irrelevant data
drop if _n == 3

*** Duplicating and creating Qualtrics generated variables
gen double start_time = Clock(v1, "YMD hms") if v1 != "StartDate" & v1 != "Start Date"
format start_time %tCCCYY-NN-DD_HH:MM:SS
la var start_time "StartTimeQualtrics"

gen double end_time = Clock(v2, "YMD hms") if v2 != "EndDate" & v2 != "End Date"
format end_time %tCCCYY-NN-DD_HH:MM:SS
la var end_time "EndTimeQualtrics"

gen response_type = v3 if v3 != "Status" & v3 != "Response Type"
la var response_type "SurveyPreview/IP"

replace v4 = "" if v4 == "Progress"
destring v4, gen(progress)
la var progress "Survey Progress"

replace v5 = "" if v5 == "Duration (in seconds)"
destring v5, gen(duration)
la var duration "Response duration in seconds"

replace v6 = "" if v6 == "Finished"
gen completion = 0 if v6 == "False"
replace completion = 1 if v6 == "True"
la var completion "Survey Completed"

gen double record_date = Clock(v7, "YMD hms") if v8 != "Recorded Date" & v7 != "RecordedDate" 
format record_date %tCCCYY-NN-DD_HH:MM:SS
la var record_date "TimeResponseRecorded"

replace v8 = "" if v8 == "ResponseId" | v8 == "Response ID"
gen qualtrics_id = v8
la var qualtrics_id "Qualtrics Response ID"

gen dist_channel = v9 if v9 != "DistributionChannel" & v9 != "Distribution Channel"
la var dist_channel "Distribution Channel"

gen qualtrics_language = v10 if v10 != "User Language" & v10 != "UserLanguage"
la var qualtrics_language "Qualtrics Language"


gen consent = v11 if v11 != "Informed Consent" & v11 != "InformedConsent"
la var consent "Informed Consent"


gen realtor_type = v12 if v12 != "Realtor Type" & v12 != "Are the majority of the real estate transactions you are involved in residential or commercial?"
la var realtor_type "Realtor Type"

*drop those responses before survey sent out (trial responses)

gen date=dofc(start_time)
drop if date<date("8/9/2021","MDY")
	
forval y=13/29 {
	replace v`y'=subinstr(v`y',"What percent of buyers observe each energy efficiency feature before purchasing a home? - ","",.)
	local text=v`y'[2]
	di "`text'"
	la var v`y' "`text'"
	
	}
	
*don't need the labels anymore
drop if _n<=2

forval y=13/29 {
	destring v`y', replace
	rename v`y' feature_`y'
}

drop v*


sum feature_*

forval y=13/29 {
	gen non_miss_feature_`y'=(!missing(feature_`y'))
	*50 is the default and means they never moved the slider.
	replace feature_`y'=50 if missing(feature_`y')
	}
	
egen num_nonmissing=rowtotal(non_miss_feature_*)

tab num_nonmissing


rename feature_13 feature_furnacefueltype
rename feature_14 feature_eeravg
rename feature_15 feature_ductravg
rename feature_16 feature_atticrvalue
rename feature_17 feature_negleakavg
rename feature_18 feature_sizeavg
rename feature_19 feature_ductsystem1type
rename feature_20 feature_notraddr
rename feature_21 feature_notwinrec
rename feature_22 feature_ah_location
rename feature_23 feature_ah_type
rename feature_24 feature_negsysageavg
rename feature_25 feature_progtherm
rename feature_26 feature_rsavg
rename feature_27 feature_waterheatertanktype
rename feature_28 feature_waterheaterfueltype
rename feature_29 feature_twosystems

local k=13
	foreach var of varlist feature_* {
		local stub=subinstr("`var'", "feature_","",.)
		rename `var' `stub'
		rename non_miss_feature_`k' non_miss_`stub'
		local k=`k'+1
		}

	gen id=_n
	 


	local auditvars waterheaterfueltype waterheatertanktype atticrvalue furnacefueltype ///
		eeravg negleakavg ///
		rsavg ductravg negsysageavg ///
		  ductsystem1type sizeavg ///
		progtherm ah_location ah_type

	la var negleakavg "$-$ \% Duct Leakage"
	la var waterheatertanktype "Tankless or Solar WH"
	la var ductsystem1type "Metal Ducts"
	la var twosystems "2 Systems"
	la var furnacefueltype "Gas Furnace"
	la var ah_type "Vertical AH"
	la var waterheaterfueltype "Gas WH"
	la var ah_location "AH in Closet"

	la var eeravg "EER"
	la var negleakavg "$-$ \% Duct Leakage"
	la var atticrvalue "Attic R-value"

	la var rsavg "Return Sizing Adequate"
	la var ductravg "Duct R-value"

	la var negsysageavg "$-$ System Age"

	la var sizeavg "HVAC Size (sqft/ton)"
	la var notwinrec "Did Not Rec WS"

	la var progtherm "Programmable Thermostat"

	la var notraddr "Did Not Rec Att. Ins"

	local othervars "twosystems ah_type notwinrec notraddr waterheatertanktype "

	local auditvars_in_order  ductravg ductsystem1type negleakavg ///
		rsavg atticrvalue eeravg ///
		ah_location sizeavg progtherm ///
		furnacefueltype waterheaterfueltype negsysageavg
		
	local allvars_in_order  ductravg ah_type ductsystem1type negleakavg ///
		rsavg notwinrec  atticrvalue  eeravg ///
		ah_location sizeavg progtherm notraddr ///
		furnacefueltype waterheaterfueltype waterheatertanktype   twosystems negsysageavg



*Filter on consent
drop if consent!="I agree to participate"
*filter on residential
drop if realtor_type=="Commercial"
*filter on completion
drop if completion==0

drop if num_nonmissing==0

save "C:\Users\awcassidy1\Dropbox\jmp_new/cleaned_data/survey_data.dta", replace


********************************************************************************
*Summary of all features
********************************************************************************
		
		
eststo clear

	qui estpost sum ///
		 `allvars_in_order' , detail
		 
	esttab using "C:\Users\awcassidy1\Dropbox\jmp_new/tables/sum_survey_all.tex", fragment replace ///
		cells((mean(label(\multicolumn{1}{c}{}) fmt(2)) ///
		sd(label(\multicolumn{1}{c}{}) fmt(2)) ///
		p50(label(\multicolumn{1}{c}{}) fmt(0)) ///
		min(label(\multicolumn{1}{c}{}) fmt(0)) ///
		max(label(\multicolumn{1}{c}{}) fmt(0)) ///
		count(label(\multicolumn{1}{c}{}) fmt(0)))) ///
		label booktabs nonum noobs ///
		nolines nomtitles
cap drop id_mean
cap drop *_tilde
egen id_mean=rowmean(`allvars_in_order')
local tildevars=""
foreach var in `allvars_in_order'{
	gen `var'_tilde=`var'-id_mean
	la var `var'_tilde "`: var label `var''"
	local tildevars="`tildevars' `var'_tilde"
	}
	

	*check that using demeaned data doesn't change much.
	eststo clear

	qui estpost sum ///
		`tildevars'
		 
	esttab using "C:\Users\awcassidy1\Dropbox\jmp_new/tables/sum_survey_demeaned_all.tex", fragment replace ///
		cells((mean(label(\multicolumn{1}{c}{}) fmt(2)) ///
		sd(label(\multicolumn{1}{c}{}) fmt(2)) ///
		min(label(\multicolumn{1}{c}{}) fmt(0)) ///
		max(label(\multicolumn{1}{c}{}) fmt(0)) ///
		count(label(\multicolumn{1}{c}{}) fmt(0)))) ///
		label booktabs nonum noobs ///
		nolines nomtitles
		
****************************************************************************
*OUTPUT data for other programs
****************************************************************************
	
		
foreach v of varlist `auditvars_in_order' `othervars' {
	gen info_se_`v' = `v'
	gen info_mean_`v' = `v'
	}
		
collapse (mean) info_mean_* (semean) info_se_*

*informationless variable for merge.
gen n=1

*now there are some that need to be added because same informational content
gen info_mean_negrrec= info_mean_notraddr
gen info_se_negrrec=info_se_notraddr

gen info_mean_negwinrec= info_mean_notwinrec
gen info_se_negwinrec=info_se_notwinrec


save "C:\Users\awcassidy1\Dropbox\jmp_new/cleaned_data/survey_sum.dta", replace
		




